library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.5.2     ✔ tibble    3.3.0
## ✔ lubridate 1.9.4     ✔ tidyr     1.3.1
## ✔ purrr     1.1.0     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(ggridges)
library(p8105.datasets)

Import the weather data

data("weather_df")

Making our first plot :-)

Use the + operator to link together your code in ggplot (kind of like the pipe operator but for ggplot)

ggplot(data = weather_df, mapping = aes(x = tmin, y = tmax)) + #telling R which data, and which variables
  geom_point()
## Warning: Removed 17 rows containing missing values or values outside the scale range
## (`geom_point()`).

Another way to do the same thing:

weather_df |> 
  ggplot(aes(x = tmin, y = tmax)) +
  geom_point()
## Warning: Removed 17 rows containing missing values or values outside the scale range
## (`geom_point()`).

This is a way to save the plot to your environment (assign it to an object):

ggp_weather_scatterplot =
  weather_df |> 
  ggplot(aes(x = tmin, y = tmax)) +
  geom_point()

ggp_weather_scatterplot #this is how I can see what I just saved
## Warning: Removed 17 rows containing missing values or values outside the scale range
## (`geom_point()`).

Check that some rows are missing.

weather_df |> 
  filter(is.na(tmax)) #this will list out in the console all observations with missing tmax
## # A tibble: 17 × 6
##    name         id          date        prcp  tmax  tmin
##    <chr>        <chr>       <date>     <dbl> <dbl> <dbl>
##  1 Molokai_HI   USW00022534 2022-05-31    NA    NA    NA
##  2 Waterhole_WA USS0023B17S 2021-03-09    NA    NA    NA
##  3 Waterhole_WA USS0023B17S 2021-12-07    51    NA    NA
##  4 Waterhole_WA USS0023B17S 2021-12-31     0    NA    NA
##  5 Waterhole_WA USS0023B17S 2022-02-03     0    NA    NA
##  6 Waterhole_WA USS0023B17S 2022-08-09    NA    NA    NA
##  7 Waterhole_WA USS0023B17S 2022-08-10    NA    NA    NA
##  8 Waterhole_WA USS0023B17S 2022-08-11    NA    NA    NA
##  9 Waterhole_WA USS0023B17S 2022-08-12    NA    NA    NA
## 10 Waterhole_WA USS0023B17S 2022-08-13    NA    NA    NA
## 11 Waterhole_WA USS0023B17S 2022-08-14    NA    NA    NA
## 12 Waterhole_WA USS0023B17S 2022-08-15    NA    NA    NA
## 13 Waterhole_WA USS0023B17S 2022-08-16    NA    NA    NA
## 14 Waterhole_WA USS0023B17S 2022-08-17    NA    NA    NA
## 15 Waterhole_WA USS0023B17S 2022-08-18    NA    NA    NA
## 16 Waterhole_WA USS0023B17S 2022-08-19    NA    NA    NA
## 17 Waterhole_WA USS0023B17S 2022-12-31    76    NA    NA

Fancier scatterplots!

alpha ranges from 0-1 (1=totally opaque, 0=totally see-through)

weather_df |> 
  ggplot(aes(x = tmin, y = tmax, color = name)) + #different color for each name
  geom_point(alpha = 0.3) +#changes opacity of points so you can see where the highest density of data lies
  geom_smooth(se = FALSE) #puts a smooth line of best fit (for each color); se = FALSE removes SE bar
## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'
## Warning: Removed 17 rows containing non-finite outside the scale range
## (`stat_smooth()`).
## Warning: Removed 17 rows containing missing values or values outside the scale range
## (`geom_point()`).

Where you define aesthetics can matter

weather_df |> 
  ggplot(aes(x = tmin, y = tmax)) + 
  geom_point(aes(color = name), alpha = 0.3, size = 0.6) + 
  #makes my points a different size (<1 = smaller, >1 = bigger)
  geom_smooth(se = FALSE) 
## `geom_smooth()` using method = 'gam' and formula = 'y ~ s(x, bs = "cs")'
## Warning: Removed 17 rows containing non-finite outside the scale range
## (`stat_smooth()`).
## Warning: Removed 17 rows containing missing values or values outside the scale range
## (`geom_point()`).

#difference here is I get a best fit line for the whole plot, not for each name, because the color statement was not at the top, therefore it was not inherited by every layer below
weather_df |> 
  ggplot(aes(x = tmin, y = tmax)) + 
  geom_point(aes(color = name, size = prcp), alpha = 0.3) + #bigger points w/ more rain, smaller w/ less rain
  geom_smooth(se = FALSE)
## `geom_smooth()` using method = 'gam' and formula = 'y ~ s(x, bs = "cs")'
## Warning: Removed 17 rows containing non-finite outside the scale range
## (`stat_smooth()`).
## Warning: Removed 19 rows containing missing values or values outside the scale range
## (`geom_point()`).

Use faceting real quick

weather_df |> 
  ggplot(aes(x = tmin, y = tmax)) + 
  geom_point(aes(color = name), alpha = 0.3, size = 0.8) +
  geom_smooth(se = FALSE) + 
  facet_grid(. ~ name) #before squiggle `~` = row, after ~ = column --> separates into a grid of multiple plots
## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'
## Warning: Removed 17 rows containing non-finite outside the scale range
## (`stat_smooth()`).
## Warning: Removed 17 rows containing missing values or values outside the scale range
## (`geom_point()`).

weather_df |> 
  ggplot(aes(x = tmin, y = tmax)) + 
  geom_point(aes(color = name), alpha = 0.3, size = 0.8) +
  geom_smooth(se = FALSE) + 
  facet_grid(name ~ .) #3 rows of plots instead of 3 columns of plots
## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'
## Warning: Removed 17 rows containing non-finite outside the scale range
## (`stat_smooth()`).
## Removed 17 rows containing missing values or values outside the scale range
## (`geom_point()`).

Let’s make a somewhat more interesting plot:

More info encoded in our graph here

weather_df |> 
  ggplot(aes(x = tmin, y = tmax, color = name, shape = name)) + #points a different shape for each name
  geom_point(aes(size = prcp), alpha = 0.3) + 
  geom_smooth(se = FALSE) +
  facet_grid(. ~ name)
## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'
## Warning: Removed 17 rows containing non-finite outside the scale range
## (`stat_smooth()`).
## Warning: Removed 19 rows containing missing values or values outside the scale range
## (`geom_point()`).

Learning assessment: write a codechain that starts with weather_df; focuses only on Central Park, converts temp to fahrenheight, makes a scatterplot of min vs. max temp, and overlays a linear regression line (using geom_smooth())

weather_df |> 
  filter (name == "CentralPark_NY") |> 
  mutate (tmax_fh = tmax*(9/5) + 32, 
          tmin_fh = tmin*(9/5) + 32) |> 
  ggplot(aes(x = tmin_fh, y = tmax_fh)) +
  geom_point() +
  geom_smooth(se = FALSE, method = "lm") #default: '`geom_smooth()` using method = 'loess' and formula = 'y ~ x'.    We changed to `lm` for linear regression line
## `geom_smooth()` using formula = 'y ~ x'

Small things

weather_df |> 
  ggplot(aes(x = tmin, y = tmax, color = name, shape = name)) + 
#  geom_point(alpha = 0.3) + (now I don't have points on my plot)
  geom_smooth(se = FALSE)
## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'
## Warning: Removed 17 rows containing non-finite outside the scale range
## (`stat_smooth()`).

weather_df |> 
  ggplot(aes(x = tmin, y = tmax, color = name, shape = name)) + 
  geom_smooth(se = FALSE) +
  geom_point() #now points on top of smooth line (whatever you want on top put last)
## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'
## Warning: Removed 17 rows containing non-finite outside the scale range
## (`stat_smooth()`).
## Warning: Removed 17 rows containing missing values or values outside the scale range
## (`geom_point()`).

Making a cool hexagon plot which shows density of points by color (communicates where most of data is)

weather_df |> 
  ggplot(aes(x = tmin, y = tmax)) +
  geom_hex()
## Warning: Removed 17 rows containing non-finite outside the scale range
## (`stat_binhex()`).

weather_df |> 
  ggplot(aes(x = tmin, y = tmax)) +
  geom_point(color = "purple") #now everything is purple, not within an aes, I'm just telling R to make it ALL       purple (or I can pick any #hex code color like below)
## Warning: Removed 17 rows containing missing values or values outside the scale range
## (`geom_point()`).

weather_df |> 
  ggplot(aes(x = tmin, y = tmax)) +
  geom_point(color = "#F54927")
## Warning: Removed 17 rows containing missing values or values outside the scale range
## (`geom_point()`).

Univariate plots

Creating a histogram of just one variable:

weather_df |> 
  ggplot(aes(x = tmin)) +
  geom_histogram(color = "white", fill = "red")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 17 rows containing non-finite outside the scale range
## (`stat_bin()`).

weather_df |> 
  ggplot(aes(x = tmin, color = name)) +
  geom_histogram()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 17 rows containing non-finite outside the scale range
## (`stat_bin()`).

weather_df |> 
  ggplot(aes(x = tmin, fill = name)) +
  geom_histogram()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 17 rows containing non-finite outside the scale range
## (`stat_bin()`).

#splitting into multiple histograms (better to compare in this case)
weather_df |> 
  ggplot(aes(x = tmin, fill = name)) +
  geom_histogram() +
  facet_grid (name ~ .)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 17 rows containing non-finite outside the scale range
## (`stat_bin()`).

maybe a density plot?

weather_df |> 
  ggplot(aes(x = tmin, fill = name)) +
  geom_density(alpha = 0.2) 
## Warning: Removed 17 rows containing non-finite outside the scale range
## (`stat_density()`).

Trying box plots

weather_df |> 
  ggplot(aes(x = name, y = tmin)) +
  geom_boxplot(aes(fill = name))
## Warning: Removed 17 rows containing non-finite outside the scale range
## (`stat_boxplot()`).

violin plots

**Anything you put in the aes statement, you are mapping, for example a color to a specific name. If you make an aesthetic change but not in the aes statement, it changes for everything, it’s not different by name

weather_df |> 
   ggplot(aes(x = name, y = tmin, fill = name)) +
   geom_violin()
## Warning: Removed 17 rows containing non-finite outside the scale range
## (`stat_ydensity()`).

ridge plot

weather_df |> 
  ggplot(aes(x = tmin, y = name)) +
   geom_density_ridges()
## Picking joint bandwidth of 1.41
## Warning: Removed 17 rows containing non-finite outside the scale range
## (`stat_density_ridges()`).

LA: univariate plots

make plots that compare precipitation across locations. Try a histogram, density plot, boxplot, violin plot, and a ridgeplot; use aesthetic mappings to make your figures readable.

weather_df |> 
  ggplot(aes(x = prcp, fill = name)) +
  geom_density(alpha = 0.2)
## Warning: Removed 15 rows containing non-finite outside the scale range
## (`stat_density()`).

weather_df |> 
  ggplot(aes(x = prcp, fill = name)) +
  geom_histogram() +
  facet_grid(name ~ .)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 15 rows containing non-finite outside the scale range
## (`stat_bin()`).

#filtering the data to just look at the middle range of data (just remember you did this)
weather_df |> 
  filter(prcp > 5, prcp < 1000) |> 
  ggplot(aes(x = prcp, fill = name)) +
  geom_density(alpha = 0.2)

Saving and embedding plots

Saving plots (goes to environment, does not go to print unless you call its name)

ggp_weather_violin =
  weather_df |> 
   ggplot(aes(x = name, y = tmin, fill = name)) +
   geom_violin()

ggp_weather_violin
## Warning: Removed 17 rows containing non-finite outside the scale range
## (`stat_ydensity()`).

ggsave("violin_plot.pdf", ggp_weather_violin,
       width = 8, height = 6) #you can save as .pdf, .png, .jpeg, whatever. I can also save to a folder e.g. named        plots by doing: ggsave("plots/violin_plot.pdf"
## Warning: Removed 17 rows containing non-finite outside the scale range
## (`stat_ydensity()`).

embedding plots

(Here I changed the size for this particular figure in the embedded readme)

ggp_weather_violin
## Warning: Removed 17 rows containing non-finite outside the scale range
## (`stat_ydensity()`).